The (interactive) correlation heatmap reveals very high correlation among aminoacids and TG compounds.
load("data/data.Rdata")
X <- df[df$Diagnosis == "Probable AD", 1:230]
C <- round(cor(X), 2)
heatmaply_cor(C, color = viridis, plot_method = "plotly", dendrogram = F, reorderfun = sort.default(d,w), main = "Correlation Heatmap", file = "heatmap.html", colorbar_thickness = 15, colorbar_len = 0.5)
Performing a Global Test on the serum metabolites of AD patients, correcting for sex (Ho: E4 status has no effect on metabolite levels, Ha: it has an effect), yields a significant difference (p = 0.029 ) between E4 carriers and non-carriers. The presence of ApoE4, correcting for sex seems to have an effect on the metabolites shown in the plot below. The most significantly affected metabolites are triglycerides, diglycerides (positively) and Îł-aminobutyric acid (negatively).
load("data/clinical.Rdata")
clinical$V_MMSE <- NULL
df[,1:230] <- scale(df[,1:230])
AD <- subset(df, Diagnosis == "Probable AD")
AD$sex <- as.numeric(AD$sex) -1
# Testing for effects
gt.b <- globaltest::gt(E4 ~ 1, E4 ~ .-E4dose -Diagnosis -target, data = AD)
gt.b
p-value Statistic Expected Std.dev #Cov
1 0.0293 1.35 0.794 0.242 231
globaltest::covariates(gt.b)
Performing a Global Test on ApoE4 dose shows distinct effects depeding on the number of ApoE4 alleles (p = 0.199).
# Multinomial outcome
AD$E4dose <- as.factor(AD$E4dose)
gt.m <- globaltest::gt(E4dose ~ 1, E4dose ~ .-E4 -Diagnosis -target, data = AD)
gt.m
p-value Statistic Expected Std.dev #Cov
1 0.0199 1.22 0.8 0.169 231
globaltest::covariates(gt.m)
Metabolite-level models to test for ApoE4 dose effects. The F-tests yielded p-values < 0.05, however none of them survived FDR correction.
nested <- function(Y, x, ...) {
### Analysis of Variance (ANOVA)
x <- as.factor(x)
df <- cbind(Y, x, ...)
ncol <- ncol(Y)
covariates <- names(...)
F_tests <- furrr::future_map(df[, 1:ncol], ~ {
frm <- as.formula(paste0(".x ~", paste(covariates, collapse = "+")))
rm <- lm(frm, data = df)
ffm <- as.formula(paste0(".x ~ x +", paste(covariates, collapse = "+")))
fm <- lm(ffm, data = df)
anova(rm, fm)
})
# Correction for Multiple Testing
# Create a list to store the p-values
p_values <- list(1:ncol)
# Extract the p-values of the F-tests from the anova summaries list and store them in p_values
for (i in 1:ncol) {
p_values[[i]] <- F_tests[[i]][["Pr(>F)"]][[2]]
}
# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(p_values))
# Set as row names the metabolites
p_values <- data.frame(p_values, row.names = colnames(Y))
# Calculate the FDR-adjusted p-values
p_values$p_adj <- p.adjust(p_values[, ], method = "fdr", n=230 )
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
sig <- as.data.frame(dplyr::filter(p_values, p_values < 0.05))
sig <- sig[order(sig$p_adj), ]
cat("Significant F-tests: \n")
print(sig)
cat("\nDose effects on metabolites:")
knitr::kable(sig)
summaries <- furrr::future_map(df[, rownames(sig)], ~ {
f <- as.formula(paste0(".x ~ x +", paste(covariates, collapse = "+")))
mdl <- lm(f, data = df)
summary(mdl)$coefficients[1:length(table(x)), ]
})
summaries <- lapply(summaries, function(x) {
x <- x[, -c(2, 3)]
x <- t(x)
})
summaries <- plyr::ldply(summaries, id = 1:2)
return(knitr::kable(summaries, table.attr = "style='width:30%;'"))
}
The metabolites most affected by ApoE4 dose are D-L-3-aminoisobutyric.acid, histamine, L-serine, ceramide d18:1/24:0, lysophosphatidylcholine 18:1, phosphatidylcholine 34:4, 36:6 and 40:4, sphingomyelin d18:1/22:0 and d18:1/24:0, LpH.PGE2, cLpH.PGA2, Lyso-phosphatidic acid 20:5 and taurine.
Y <- df[, 1:230]
df$E4dose <- as.numeric(df$E4dose) -1
df$E4dose[df$E4dose == 0] <- "No ApoE4"
df$E4dose[df$E4dose == 1] <- "1 ApoE4"
df$E4dose[df$E4dose == 2] <- "2 ApoE4"
E4dose <- df$E4dose <- as.factor(df$E4dose)
## Testing for effects of ApoE4 dose
nested(Y, relevel(E4dose, ref= "No ApoE4"), clinical)
Significant F-tests:
p_values p_adj
Am.DL.3.aminoisobutyric.acid 0.019274304 0.6617562
Am.Histamine 0.010564489 0.6617562
Am.L.Serine 0.029824862 0.6617562
Lip.Cer.d18.1.24.0. 0.021905538 0.6617562
Lip.LPC.O.18.1. 0.031940407 0.6617562
Lip.PC.34.4. 0.034526409 0.6617562
Lip.PC.40.4. 0.030049244 0.6617562
Lip.SM.d18.1.22.0. 0.028799960 0.6617562
Lip.SM.d18.1.24.0. 0.032508286 0.6617562
OS.LpH.PGE2 0.033150673 0.6617562
OS.cLpH.PGA2 0.031558866 0.6617562
OS.HpH.LPA.C20.5 0.009384457 0.6617562
Am.Taurine 0.048562691 0.7013423
Lip.PC.O.36.6. 0.045023526 0.7013423
Dose effects on metabolites:
| .id | (Intercept) | x1 ApoE4 | x2 ApoE4 |
|---|---|---|---|
| Am.DL.3.aminoisobutyric.acid | -1.5490954 | 0.3318987 | 0.8154008 |
| Am.DL.3.aminoisobutyric.acid | 0.4134351 | 0.0859055 | 0.0081663 |
| Am.Histamine | 0.3195441 | -0.5659714 | -0.0265480 |
| Am.Histamine | 0.8658295 | 0.0037529 | 0.9304131 |
| Am.L.Serine | 0.0333628 | 0.3379990 | -0.3722348 |
| Am.L.Serine | 0.9851888 | 0.0658654 | 0.1990250 |
| Lip.Cer.d18.1.24.0. | -0.0417257 | 0.4270800 | 0.6754568 |
| Lip.Cer.d18.1.24.0. | 0.9820991 | 0.0251655 | 0.0252529 |
| Lip.LPC.O.18.1. | 1.8656291 | 0.4080309 | -0.2033259 |
| Lip.LPC.O.18.1. | 0.3018001 | 0.0273284 | 0.4834037 |
| Lip.PC.34.4. | 1.9741272 | 0.2164485 | 0.7565347 |
| Lip.PC.34.4. | 0.2780447 | 0.2418460 | 0.0105002 |
| Lip.PC.40.4. | 2.3598353 | 0.0213711 | 0.7079498 |
| Lip.PC.40.4. | 0.1671211 | 0.9016148 | 0.0106181 |
| Lip.SM.d18.1.22.0. | 0.7614513 | 0.4969492 | 0.4761153 |
| Lip.SM.d18.1.22.0. | 0.6893587 | 0.0111837 | 0.1216138 |
| Lip.SM.d18.1.24.0. | 1.4230810 | 0.5056344 | 0.3794747 |
| Lip.SM.d18.1.24.0. | 0.4567577 | 0.0100985 | 0.2178905 |
| OS.LpH.PGE2 | -3.3945165 | 0.4030782 | -0.2497479 |
| OS.LpH.PGE2 | 0.0697169 | 0.0344025 | 0.4040965 |
| OS.cLpH.PGA2 | -4.6375504 | 0.4328432 | -0.2178364 |
| OS.cLpH.PGA2 | 0.0164721 | 0.0272850 | 0.4789442 |
| OS.HpH.LPA.C20.5 | -0.6582008 | -0.5979650 | -0.2611871 |
| OS.HpH.LPA.C20.5 | 0.7285528 | 0.0022985 | 0.3921756 |
| Am.Taurine | -0.6684682 | 0.3270235 | -0.4141868 |
| Am.Taurine | 0.7360219 | 0.1061496 | 0.1951597 |
| Lip.PC.O.36.6. | -1.8817905 | -0.4751836 | -0.4010070 |
| Lip.PC.O.36.6. | 0.3262360 | 0.0156350 | 0.1937509 |
Among AD patients, ApoE4 dose seems to have an effect on several triglycerides, diglycerides, L-glutamic acid, L-lysine, 2- and 3-hydroxybutyric acid, glutamic acid, 3-phosphoglyceric acid, phosphatidylcholin, sphyngomyelin, HpH.PAF.C17, phosphoethanolamine, imino-diacetate.
AD <- subset(df, Diagnosis == "Probable AD")
ADY <- AD[, 1:230]
ADE4dose <- AD$E4dose
ADclinical <- clinical[df$Diagnosis == "Probable AD",]
## Testing for effects of ApoE4 dose
nested(ADY, relevel(ADE4dose, ref= "No ApoE4"), ADclinical)
Significant F-tests:
p_values p_adj
Lip.TG.52.3. 0.002128748 0.2496400
Lip.TG.52.4. 0.002585849 0.2496400
Lip.TG.54.5. 0.004966959 0.2496400
Lip.TG.54.6. 0.005426957 0.2496400
Lip.TG.56.6. 0.003464999 0.2496400
Lip.TG.54.4. 0.007750811 0.2845736
Lip.DG.36.3. 0.008660937 0.2845736
Am.L.Glutamic.acid 0.011127623 0.3141256
Am.L.Lysine 0.028681036 0.3141256
OA.OA01...2.hydroxybutyric.acid 0.020666789 0.3141256
OA.OA04...Glutamic.Acid 0.025302325 0.3141256
OA.OA18...3.Phosphoglyceric.acid 0.024813628 0.3141256
Lip.TG.50.2. 0.027348399 0.3141256
Lip.TG.50.3. 0.024166483 0.3141256
Lip.TG.52.2. 0.023188871 0.3141256
Lip.TG.52.5. 0.021890331 0.3141256
Lip.TG.56.7. 0.017149208 0.3141256
Lip.TG.56.8. 0.023831512 0.3141256
Lip.Cer.d18.1.24.0. 0.028242776 0.3141256
Lip.PC.O.34.3. 0.020831975 0.3141256
OS.HpH.LPA.C18.3 0.015373446 0.3141256
Lip.TG.51.3. 0.031204970 0.3262338
Lip.TG.50.1. 0.036088644 0.3451584
Lip.SM.d18.1.22.0. 0.036844964 0.3451584
OS.HpH.PAF.C16.0 0.037517223 0.3451584
OA.OA17...3.Hydroxybutyric.acid 0.039302817 0.3476788
Lip.TG.54.3. 0.041634484 0.3546641
Am.O.Phosphoethanolamine 0.043904743 0.3606461
OA.OA23...Iminodiacetate 0.047339603 0.3754520
Dose effects on metabolites:
| .id | (Intercept) | x1 ApoE4 | x2 ApoE4 |
|---|---|---|---|
| Lip.TG.52.3. | -1.4643782 | 1.2007214 | 1.1831106 |
| Lip.TG.52.3. | 0.6358058 | 0.0010862 | 0.0072690 |
| Lip.TG.52.4. | -0.1121429 | 1.0791483 | 1.0133654 |
| Lip.TG.52.4. | 0.9679217 | 0.0011411 | 0.0105351 |
| Lip.TG.54.5. | 1.2741030 | 1.0610276 | 0.8388477 |
| Lip.TG.54.5. | 0.6529190 | 0.0015469 | 0.0341720 |
| Lip.TG.54.6. | 0.2658980 | 0.9338129 | 0.9026945 |
| Lip.TG.54.6. | 0.9194230 | 0.0025351 | 0.0151562 |
| Lip.TG.56.6. | 2.5632970 | 1.2397864 | 1.0449996 |
| Lip.TG.56.6. | 0.4277371 | 0.0011921 | 0.0211104 |
| Lip.TG.54.4. | 0.2841003 | 1.1968788 | 0.7554508 |
| Lip.TG.54.4. | 0.9310659 | 0.0020188 | 0.0962076 |
| Lip.DG.36.3. | 0.0840839 | 1.1289266 | 1.0213961 |
| Lip.DG.36.3. | 0.9796341 | 0.0034506 | 0.0271826 |
| Am.L.Glutamic.acid | 1.1589949 | 1.1005617 | 0.6335507 |
| Am.L.Glutamic.acid | 0.7128623 | 0.0029035 | 0.1435525 |
| Am.L.Lysine | 0.5379920 | -0.3539418 | 0.6661742 |
| Am.L.Lysine | 0.8465875 | 0.2524606 | 0.0836013 |
| OA.OA01…2.hydroxybutyric.acid | -4.5792935 | 0.5442059 | 0.6278205 |
| OA.OA01…2.hydroxybutyric.acid | 0.0223926 | 0.0143576 | 0.0210505 |
| OA.OA04…Glutamic.Acid | -3.5427289 | 0.8275798 | 0.9267894 |
| OA.OA04…Glutamic.Acid | 0.2404107 | 0.0159397 | 0.0271758 |
| OA.OA18…3.Phosphoglyceric.acid | -11.3526089 | 1.2305158 | 1.0944053 |
| OA.OA18…3.Phosphoglyceric.acid | 0.0086309 | 0.0096998 | 0.0559774 |
| Lip.TG.50.2. | 1.0021497 | 0.6057483 | 0.9960892 |
| Lip.TG.50.2. | 0.7178487 | 0.0534302 | 0.0113252 |
| Lip.TG.50.3. | 1.8117447 | 0.8093683 | 0.9455898 |
| Lip.TG.50.3. | 0.5411158 | 0.0170936 | 0.0230086 |
| Lip.TG.52.2. | -0.8964596 | 0.8696917 | 1.0859643 |
| Lip.TG.52.2. | 0.7835104 | 0.0199221 | 0.0182605 |
| Lip.TG.52.5. | 2.2526828 | 0.8276755 | 0.8847326 |
| Lip.TG.52.5. | 0.4348213 | 0.0124079 | 0.0281487 |
| Lip.TG.56.7. | -1.7517659 | 0.9374863 | 1.1233568 |
| Lip.TG.56.7. | 0.5962265 | 0.0135900 | 0.0159981 |
| Lip.TG.56.8. | -3.4819968 | 0.8381351 | 1.0251686 |
| Lip.TG.56.8. | 0.2689616 | 0.0192008 | 0.0198447 |
| Lip.Cer.d18.1.24.0. | -0.7877149 | 0.6230035 | 1.2301173 |
| Lip.Cer.d18.1.24.0. | 0.8124950 | 0.0950419 | 0.0092203 |
| Lip.PC.O.34.3. | 2.5817961 | -0.9050800 | -0.4233676 |
| Lip.PC.O.34.3. | 0.3625664 | 0.0058104 | 0.2721552 |
| OS.HpH.LPA.C18.3 | 3.7385786 | -0.8558650 | -0.4609491 |
| OS.HpH.LPA.C18.3 | 0.1485977 | 0.0041038 | 0.1879416 |
| Lip.TG.51.3. | -0.5053142 | 0.9783986 | 0.8657830 |
| Lip.TG.51.3. | 0.8813664 | 0.0121797 | 0.0655915 |
| Lip.TG.50.1. | 1.7033554 | 0.5403834 | 0.9621863 |
| Lip.TG.50.1. | 0.5363102 | 0.0807345 | 0.0134026 |
| Lip.SM.d18.1.22.0. | 2.2055258 | 0.7500179 | 1.0318359 |
| Lip.SM.d18.1.22.0. | 0.4908551 | 0.0385733 | 0.0216149 |
| OS.HpH.PAF.C16.0 | 1.1277201 | 0.5195603 | 0.8984433 |
| OS.HpH.PAF.C16.0 | 0.6645912 | 0.0759694 | 0.0145063 |
| OA.OA17…3.Hydroxybutyric.acid | -3.9513696 | 0.0280606 | 0.8053452 |
| OA.OA17…3.Hydroxybutyric.acid | 0.1137514 | 0.9174098 | 0.0199116 |
| Lip.TG.54.3. | 0.1130591 | 1.0658881 | 0.5687422 |
| Lip.TG.54.3. | 0.9755739 | 0.0122726 | 0.2602614 |
| Am.O.Phosphoethanolamine | 3.1131060 | 0.9995290 | 0.6354866 |
| Am.O.Phosphoethanolamine | 0.3773964 | 0.0133318 | 0.1869034 |
| OA.OA23…Iminodiacetate | -2.9181098 | 0.9915980 | 0.5951219 |
| OA.OA23…Iminodiacetate | 0.4091103 | 0.0142923 | 0.2171628 |
Among individuals with SCD, lipid metabolites were not affected as much as in the AD group, with only LPC and PC showing a difference. Aminoacids (glycine, taurine, L-serine, serotonine, L-glutamic acid, L-glutamic acid, phosphoethanolamine) and oxidative stress compounds were mostly affected in this group.
SCD <- df[df$Diagnosis == "Subjectieve klachten",]
SCDY <- SCD[, 1:230]
SCDE4dose <- SCD$E4dose
SCDclinical <- clinical[df$Diagnosis == "Subjectieve klachten",]
## Testing for effects of ApoE4 dose
nested(SCDY, relevel(SCDE4dose, ref= "No ApoE4"), SCDclinical)
Significant F-tests:
p_values p_adj
Am.Glycine 0.003514740 0.3473546
Am.Taurine 0.002948325 0.3473546
OS.cLpH.PGA2 0.005011479 0.3473546
OS.HpH.LPA.C18.1 0.006040949 0.3473546
OS.cHpH.aLPA.C16.1 0.007960965 0.3662044
Am.Ethanolamine 0.013509392 0.4438800
OA.OA11...Succinic.acid 0.013146702 0.4438800
Am.L.Serine 0.017478101 0.4580221
Am.Serotonine 0.017922604 0.4580221
Am.O.Phosphoethanolamine 0.021914277 0.5040284
OS.cLpH.PGF2a 0.024487586 0.5120132
OS.LpH.PGE2 0.028774008 0.5515018
Am.L.Glutamic.acid 0.037526747 0.6165108
Lip.LPC.18.1. 0.035209617 0.6165108
OS.HpH.S.1.P.C18.1 0.040427658 0.6198908
Am.L.Aspartic.acid 0.049024437 0.6411695
Lip.PC.36.3. 0.045474810 0.6411695
Dose effects on metabolites:
| .id | (Intercept) | x1 ApoE4 | x2 ApoE4 |
|---|---|---|---|
| Am.Glycine | 0.3136001 | 0.8094015 | -1.2299980 |
| Am.Glycine | 0.9288642 | 0.0049285 | 0.1199675 |
| Am.Taurine | -4.1889773 | 0.5828771 | -2.0018238 |
| Am.Taurine | 0.2317949 | 0.0374504 | 0.0121757 |
| OS.cLpH.PGA2 | -4.9708116 | 0.6421668 | -2.2210054 |
| OS.cLpH.PGA2 | 0.2239471 | 0.0488834 | 0.0167946 |
| OS.HpH.LPA.C18.1 | -2.0076802 | 0.2572539 | -2.1977631 |
| OS.HpH.LPA.C18.1 | 0.5392303 | 0.3197481 | 0.0036995 |
| OS.cHpH.aLPA.C16.1 | -0.7226914 | 0.4097244 | -1.6050636 |
| OS.cHpH.aLPA.C16.1 | 0.8058507 | 0.0814836 | 0.0170329 |
| Am.Ethanolamine | -7.6872306 | 0.4741103 | -1.2610213 |
| Am.Ethanolamine | 0.0107470 | 0.0434221 | 0.0565401 |
| OA.OA11…Succinic.acid | -2.6309618 | 0.7057740 | -0.7535436 |
| OA.OA11…Succinic.acid | 0.4269017 | 0.0087414 | 0.3075400 |
| Am.L.Serine | -0.1446469 | 0.4478211 | -1.5500162 |
| Am.L.Serine | 0.9648177 | 0.0876696 | 0.0375814 |
| Am.Serotonine | -5.4398488 | 0.6027268 | -1.0048081 |
| Am.Serotonine | 0.0966104 | 0.0211071 | 0.1661665 |
| Am.O.Phosphoethanolamine | -7.2865376 | 0.5261461 | -1.7376425 |
| Am.O.Phosphoethanolamine | 0.0639237 | 0.0888567 | 0.0476605 |
| OS.cLpH.PGF2a | -4.0168137 | 0.5612769 | -1.6570842 |
| OS.cLpH.PGF2a | 0.3108338 | 0.0754429 | 0.0633847 |
| OS.LpH.PGE2 | -4.0593930 | 0.5430712 | -1.7009626 |
| OS.LpH.PGE2 | 0.3164788 | 0.0920321 | 0.0624149 |
| Am.L.Glutamic.acid | -2.7679866 | 0.1140449 | -1.9281177 |
| Am.L.Glutamic.acid | 0.4226440 | 0.6744710 | 0.0144904 |
| Lip.LPC.18.1. | -4.5454870 | 0.5561727 | -1.0740727 |
| Lip.LPC.18.1. | 0.1905223 | 0.0445592 | 0.1650333 |
| OS.HpH.S.1.P.C18.1 | -0.3190975 | 0.3557437 | -1.5115974 |
| OS.HpH.S.1.P.C18.1 | 0.9242884 | 0.1827166 | 0.0472202 |
| Am.L.Aspartic.acid | -0.0247712 | -0.1948346 | -1.9304107 |
| Am.L.Aspartic.acid | 0.9943290 | 0.4793053 | 0.0155843 |
| Lip.PC.36.3. | -1.2965704 | 0.5594230 | -0.4703524 |
| Lip.PC.36.3. | 0.6698649 | 0.0227875 | 0.4877525 |
The discriminative potential of the metabolites, correcting for clinical background features, on AD and ApoE4 or not (4-class response ADE4, AD, SCDE4, SCD)is assessed by first fitting a Multi-nomial Logistic Regression (benchmark model). The benchmark model fits only the clinical features. The model’s AUC is compared with the same model (with altered hyperparameters) fitting clinical variables + 230 metabolites. The metabolites are then projected to 6 latent ML-estimated factors and 3 more models are fitted: Multi-nomial Logistic Regression, Decision Tree and XGBoost fitting clinical variables + 6 factors. The models’ multi-class classification performance is compared using confusion matrices and AUCs.
multifit <- function(X,
y,
model,
ctrl = NULL,
grid = NULL,
seed = 87654, ...) {
set.seed(seed)
# Merge X and y into df
df <- cbind.data.frame(X, y)
# Train the model
mdl <- caret::train(df[, 1:ncol(X)], df$y,
method = model,
tuneGrid = grid,
trControl = ctrl,
metric = "logLoss",
...
)
# Create a confusion matrix and get performance metrics from caret
obs <- mdl$pred$obs
preds <- mdl$pred$pred
cm <- confusionMatrix(reference = obs, data = preds, mode = "everything")
# Get the multi-class ROC curves
ys <- as.numeric(obs) - 1
yhats <- as.numeric(preds) - 1
roc <- multiclass.roc(response = ys, predictor = yhats, quiet = T, legacy.axes = T)
out <- list("cm" = cm, "roc" = roc, "model" = mdl)
return(out)
}
# httpgd::hgd()
evaluate <- function(model){
print(model$roc$auc)
cat("\n")
print(model$cm)
names(model$roc$rocs) <- c("ADE4-AD", "ADE4-SCDE4", "ADE4-SCD", "AD-SCDE4", "AD-SCD", "SCDE4-SCD")
g <- ggroc(model$roc$rocs, legacy.axes = T) + scale_color_tableau() + theme_tufte() + guides(color = guide_legend(title = "ROC curves"))
ggplotly(g)
}
load("data/data.Rdata")
X <- scale(df[, 1:230])
y <- df$target
Xclin = cbind.data.frame(X, clin_dummy)
ctrl <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 100,
savePredictions = "final",
classProbs = T,
summaryFunction = multiClassSummary,
selectionFunction = best,
search = "random",
sampling = "smote"
)
benchmark <- multifit(
X = clin_dummy,
y = y,
model = "multinom",
ctrl = ctrl,
grid = expand.grid(decay = 0),
trace = F
)
evaluate(benchmark)
Multi-class area under the curve: 0.8143
Confusion Matrix and Statistics
Reference
Prediction AD ADE4 SCD SCDE4
AD 4756 2251 2 102
ADE4 3736 904 31 119
SCD 34 199 1892 4158
SCDE4 74 46 2075 4321
Overall Statistics
Accuracy : 0.4807
95% CI : (0.4744, 0.4869)
No Information Rate : 0.3522
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.2972
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: AD Class: ADE4 Class: SCD Class: SCDE4
Sensitivity 0.5530 0.2659 0.4730 0.4967
Specificity 0.8537 0.8176 0.7879 0.8628
Pos Pred Value 0.6688 0.1887 0.3011 0.6631
Neg Pred Value 0.7815 0.8746 0.8855 0.7592
Precision 0.6688 0.1887 0.3011 0.6631
Recall 0.5530 0.2659 0.4730 0.4967
F1 0.6054 0.2208 0.3680 0.5680
Prevalence 0.3482 0.1377 0.1619 0.3522
Detection Rate 0.1926 0.0366 0.0766 0.1749
Detection Prevalence 0.2879 0.1939 0.2544 0.2638
Balanced Accuracy 0.7034 0.5417 0.6304 0.6797
mlr <- multifit(
X = Xclin,
y = y,
model = "multinom",
ctrl = ctrl,
grid = expand.grid(decay = 10),
trace = F
)
evaluate(mlr)
Multi-class area under the curve: 0.8338
Confusion Matrix and Statistics
Reference
Prediction AD ADE4 SCD SCDE4
AD 5764 2368 0 5
ADE4 2829 966 1 42
SCD 4 6 1659 2836
SCDE4 3 60 2340 5817
Overall Statistics
Accuracy : 0.5751
95% CI : (0.5689, 0.5813)
No Information Rate : 0.3522
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4076
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: AD Class: ADE4 Class: SCD Class: SCDE4
Sensitivity 0.6702 0.28412 0.41475 0.6686
Specificity 0.8526 0.86516 0.86251 0.8498
Pos Pred Value 0.7084 0.25169 0.36826 0.7077
Neg Pred Value 0.8288 0.88333 0.88408 0.8251
Precision 0.7084 0.25169 0.36826 0.7077
Recall 0.6702 0.28412 0.41475 0.6686
F1 0.6888 0.26692 0.39012 0.6876
Prevalence 0.3482 0.13765 0.16194 0.3522
Detection Rate 0.2334 0.03911 0.06717 0.2355
Detection Prevalence 0.3294 0.15538 0.18239 0.3328
Balanced Accuracy 0.7614 0.57464 0.63863 0.7592
project <- function(X, m, seed) {
set.seed(seed)
X <- scale(as.matrix(df[,1:230]))
cov <- cor(X)
# Find redundant features
filter <- RF(cov)
# Filter out redundant features
filtered <- subSet(X, filter)
# Regularized correlation matrix estimation
M <- regcor(filtered)
# Get the regularized correlation matrix of the filtered dataset
R <- M$optCor
mlfa <- mlFA(R, m = 6)
thomson <- facScore(filtered, mlfa$Loadings, mlfa$Uniqueness)
return(thomson)
}
thomson <- project(X, 6, seed = 1234)
X <- cbind(clin_dummy, thomson)
mlrf <- multifit(
X = X,
y = y,
model = "multinom",
ctrl = ctrl,
trace = FALSE,
grid = expand.grid(decay = 0)
)
evaluate(mlrf)
Multi-class area under the curve: 0.8178
Confusion Matrix and Statistics
Reference
Prediction AD ADE4 SCD SCDE4
AD 5047 2100 4 91
ADE4 3344 941 60 202
SCD 115 215 1998 3271
SCDE4 94 144 1938 5136
Overall Statistics
Accuracy : 0.5313
95% CI : (0.525, 0.5375)
No Information Rate : 0.3522
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.3593
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: AD Class: ADE4 Class: SCD Class: SCDE4
Sensitivity 0.5869 0.2768 0.49950 0.5903
Specificity 0.8637 0.8307 0.82604 0.8640
Pos Pred Value 0.6969 0.2069 0.35685 0.7024
Neg Pred Value 0.7965 0.8780 0.89519 0.7950
Precision 0.6969 0.2069 0.35685 0.7024
Recall 0.5869 0.2768 0.49950 0.5903
F1 0.6372 0.2368 0.41629 0.6415
Prevalence 0.3482 0.1377 0.16194 0.3522
Detection Rate 0.2043 0.0381 0.08089 0.2079
Detection Prevalence 0.2932 0.1841 0.22668 0.2960
Balanced Accuracy 0.7253 0.5537 0.66277 0.7272
tree <- multifit(
X = X,
y = y,
model = "rpart2",
ctrl = ctrl,
grid = expand.grid(maxdepth=3)
)
evaluate(tree)
Multi-class area under the curve: 0.8176
Confusion Matrix and Statistics
Reference
Prediction AD ADE4 SCD SCDE4
AD 5578 1904 56 95
ADE4 2692 1241 58 155
SCD 182 153 2704 5420
SCDE4 148 102 1182 3030
Overall Statistics
Accuracy : 0.5082
95% CI : (0.502, 0.5145)
No Information Rate : 0.3522
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.3445
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: AD Class: ADE4 Class: SCD Class: SCDE4
Sensitivity 0.6486 0.36500 0.6760 0.3483
Specificity 0.8724 0.86362 0.7220 0.9105
Pos Pred Value 0.7308 0.29932 0.3197 0.6791
Neg Pred Value 0.8229 0.89496 0.9202 0.7198
Precision 0.7308 0.29932 0.3197 0.6791
Recall 0.6486 0.36500 0.6760 0.3483
F1 0.6872 0.32892 0.4341 0.4604
Prevalence 0.3482 0.13765 0.1619 0.3522
Detection Rate 0.2258 0.05024 0.1095 0.1227
Detection Prevalence 0.3090 0.16785 0.3425 0.1806
Balanced Accuracy 0.7605 0.61431 0.6990 0.6294
xgb <- multifit(
X = X,
y = y,
model = "xgbTree",
ctrl = ctrl,
grid = xgb.grid
)
evaluate(xgb)
Multi-class area under the curve: 0.8355
Confusion Matrix and Statistics
Reference
Prediction AD ADE4 SCD SCDE4
AD 6432 2000 19 3
ADE4 2079 1263 35 184
SCD 24 6 1559 3086
SCDE4 65 131 2387 5427
Overall Statistics
Accuracy : 0.5944
95% CI : (0.5882, 0.6005)
No Information Rate : 0.3522
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4336
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: AD Class: ADE4 Class: SCD Class: SCDE4
Sensitivity 0.7479 0.37147 0.38975 0.6238
Specificity 0.8744 0.89211 0.84947 0.8386
Pos Pred Value 0.7608 0.35468 0.33348 0.6775
Neg Pred Value 0.8666 0.89891 0.87810 0.8039
Precision 0.7608 0.35468 0.33348 0.6775
Recall 0.7479 0.37147 0.38975 0.6238
F1 0.7543 0.36288 0.35942 0.6496
Prevalence 0.3482 0.13765 0.16194 0.3522
Detection Rate 0.2604 0.05113 0.06312 0.2197
Detection Prevalence 0.3423 0.14417 0.18927 0.3243
Balanced Accuracy 0.8112 0.63179 0.61961 0.7312
Observations:
Adding serum metabolite information (either the full 230-metabolite matrix or its 6-factor projection) seems to increase the discriminatory power of the models.
Fitting 6 ML-estimated factors obtained by the FMradio package (cummulatively explaining 30% of variace) yields increased classification performance, serving as a valuable dimension reduction technique for high-dimensional data.
Looking at the confusion matrix and individual ROC curves, all models were able to discriminate better among certain classes (AD+E4/SCD+E4, AD+E4/SCD, AD-E4/SCD+E4 and AD-E4/SCD-E4) compared to others (AD+E4/AD-E4 and SCD+E4/SCD-E4).
aucs <- data.frame(AUC = c("Clinical features only" = benchmark$roc$auc, "Clinical features + 230 metabolites" = mlr$roc$auc, "Clinical features + 6 latent factors" = mlrf$roc$auc, "Decision Tree" = tree$roc$auc, "XGBoost" = xgb$roc$auc))
knitr::kable(aucs)
| AUC | |
|---|---|
| Clinical features only | 0.8143379 |
| Clinical features + 230 metabolites | 0.8338068 |
| Clinical features + 6 latent factors | 0.8177738 |
| Decision Tree | 0.8176124 |
| XGBoost | 0.8355013 |
The covariance network analysis shows distinct metabolic “images” among ApoE4 carriers and non-carriers in AD. The network vertices and edges are distinct.
# Store all observations of ApoE4 non-carriers in C1
C1 <- scale(df[df$E4 == 0,1:230])
# Store all observations of ApoE4 carriers in C2
C2 <- scale(df[df$E4 == 1,1:230])
# Get the covariance matrices of C1 and C2
S1 <- covML(C1)
S2 <- covML(C2)
# Store them in a list
S <- list(S1 = S1, S2 = S2)
# Get the total number of samples
n <- c(nrow(S1), nrow(S2))
# Create a list of fused covariance matrices T
Ts <- default.target.fused(Slist = S, ns = n, type = "DUPV")
# Get the optimal lambdas per class and fused with 10-fold CV
# set.seed(8910)
# optf <- optPenalty.fused(
# Ylist = Ys,
# Tlist = Ts,
# lambda = default.penalty(Ys),
# cv.method = "kCV",
# k = 10,
# verbose = FALSE
# )
# save(optf, file= "data/optf.Rdata")
i = 1 | max diff = 1.1226918030e+00
i = 2 | max diff = 1.2908036134e-28
Converged in 2 iterations, max diff < 1.49e-08.
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting
- Retained elements: 343
- Corresponding to 1.3 % of possible edges
Step 1... determine cutoff point
Step 2... estimate parameters of null distribution and eta0
Step 3... compute p-values and estimate empirical PDF/CDF
Step 4... compute q-values and local fdr
Step 5... prepare for plotting
- Retained elements: 376
- Corresponding to 1.43 % of possible edges
GGMs showing distinct metabolic compositions among ApoE4 carriers (left), non-carriers(middle) and the differential edges (right).
# Merge the sparse high precision matrices
TST <- Union(P0s$S1$sparseParCor, P0s$S2$sparseParCor)
PCE4NO <- TST$M1subset
PCE4YES <- TST$M2subset
# Create a color map per metabolite class
Colors <- rownames(PCE4YES)
Colors[grep("Am", rownames(PCE4YES))] <- "pink"
Colors[grep("OA", rownames(PCE4YES))] <- "lightblue"
Colors[grep("Lip", rownames(PCE4YES))] <- "yellow"
Colors[grep("OS", rownames(PCE4YES))] <- "purple"
set.seed(111213)
# Plot the sparsified ridge matrix of ApoE4 carriers with AD
Coords <- Ugraph(PCE4YES,
type = "fancy", lay = "layout_with_fr",
Vcolor = Colors, prune = FALSE, Vsize = 10, Vcex = 0.4,
main = "ApoE4 carriers with AD"
)
# Plot he sparsified ridge matrix of ApoE4 non-carriers with AD
Ugraph(PCE4NO,
type = "fancy", lay= NULL, coords = Coords,
Vcolor = Colors, prune = FALSE, Vsize = 10, Vcex = 0.4,
main = "ApoE4 non-carriers with AD"
)
# Plot the differential network
DiffGraph(PCE4NO, PCE4YES,
lay = NULL, coords = Coords,
Vcolor = Colors, Vsize = 10, Vcex = 0.4,
main = "Differential Network"
)
PC0list <- list(PCE4NO = PCE4NO, PCE4YES = PCE4YES)
# Get the network statistics
NetStats <- GGMnetworkStats.fused(PC0list)
NetStatsE4yes <- NetStats[,10:18]
NetStatsE4no <- NetStats[, 1:9]
NetStatsE4yes[, c(3, 9)] <- NetStatsE4no[, c(3, 9)] <- NULL
# # Plot the densities of centrality degree scores
# plot(density(DegreesAD1[, 2]),
# col = "blue", xlim = c(-1, 8), xlab = "Degree", main = ""
# )
# lines(density(DegreesAD2[, 2]),
# col = "red"
# )
# legenda <- c("AD class 1", "AD class 2")
# legend(5, 0.5,
# legend = legenda,
# lwd = rep(1, 2), lty = rep(1, 2), col = c("blue", "red"), cex = 0.7
# )
Wilcoxon Signed Rank test between the network statistics of ApoE4 carriers and non-carriers. The test shows distinct centrality degrees and average number of negative and positive edges.
# Perform a Wilcoxon signed rank test
w <- furrr::future_map2(NetStatsE4yes[, 1:7], NetStatsE4no[, 1:7], ~ {
wilcox.test(.x, .y, paired = TRUE, alternative = "greater")
})
p.values <- list(1:length(w))
for(i in 1:length(w)){
p.values[i] <- w[[i]][['p.value']]
}
names(p.values) <- names(w)
p.values[p.values<0.05]
$PCE4YES.degree
[1] 0.001016491
$PCE4YES.nNeg
[1] 0.007252307
$PCE4YES.nPos
[1] 0.02813907
Distinct metabolite communities among ApoE4 carriers/non carriers
# Get the communities per class
set.seed(141516)
CommC1 <- Communities(PCE4NO,
Vcolor = Colors,
Vsize = 10, Vcex = 0.5, main = "ApoE4 non-carriers"
)
CommC2 <- Communities(PCE4YES,
Vcolor = Colors,
Vsize = 10, Vcex = 0.5, main = "ApoE4 carriers"
)